##### This code investigates heterogeneity by global conflict, violence, and governance by origin country
### See email from Wifag Adnan on Nov 20, 2023 for more info on data (she compiled it)
### Author: Jonathan Zhang
### Date: Nov 20, 2023
# Updated Jan 22, 2024

library(haven)
library(readr)
library(data.table)
library(scales)
library(dplyr)
library(lfe)
library(stargazer)

cohort <- fread("H:/Zheng_10223/Joint/cohort_2025.csv")


# merge p98 and p99 for vetting
cohort$MainParent_Income_HH_MainParentAge45_49_pctparent[cohort$MainParent_Income_HH_MainParentAge45_49_pctparent==99]=98

conflict <- fread("H:/Zheng_10223/final_with_population_count.csv")

# Impute missing as zero only after the data starts being collected: 1989 for UCDP and 1997 for ACLED
conflict$UCDP_TotalFatalities[which(is.na(conflict$UCDP_TotalFatalities) & conflict$Year>=1989)] <- 0
conflict$ACLED_TotalFatalities[which(is.na(conflict$ACLED_TotalFatalities) & conflict$Year>=1997)] <- 0


# Child birth country name to merge to Wifag's dataset
conflict$CountryCode <- NA
conflict$CountryCode[which(conflict$Country=="Afghanistan")] <- 41004
conflict$CountryCode[which(conflict$Country=="Bangladesh")] <- 44050
conflict$CountryCode[which(conflict$Country=="Bosnia and Herzegovina")] <- 24070
conflict$CountryCode[which(conflict$Country=="Cambodia")] <- 43116
conflict$CountryCode[which(conflict$Country=="Chile")] <- 14152
conflict$CountryCode[which(conflict$Country=="China")] <- 42156
conflict$CountryCode[which(conflict$Country=="Congo, Democratic Republic of the")] <- 34180
conflict$CountryCode[which(conflict$Country=="Czechoslovakia")] <- 22902
conflict$CountryCode[which(conflict$Country=="El Salvador")] <- 12222
conflict$CountryCode[which(conflict$Country=="Ethiopia")] <- 32231
conflict$CountryCode[which(conflict$Country=="France")] <- 21250
conflict$CountryCode[which(conflict$Country=="Germany")] <- 21276
conflict$CountryCode[which(conflict$Country=="Ghana")] <- 31288
conflict$CountryCode[which(conflict$Country=="Guatemala")] <- 12320
conflict$CountryCode[which(conflict$Country=="Guyana")] <- 14328
conflict$CountryCode[which(conflict$Country=="Haiti")] <- 13332
conflict$CountryCode[which(conflict$Country=="Hong Kong")] <- 42344
conflict$CountryCode[which(conflict$Country=="Hungary")] <- 22348
conflict$CountryCode[which(conflict$Country=="India")] <- 44356
conflict$CountryCode[which(conflict$Country=="Iran")] <- 41364
conflict$CountryCode[which(conflict$Country=="Iraq")] <- 41368
conflict$CountryCode[which(conflict$Country=="Israel")] <- 41376
conflict$CountryCode[which(conflict$Country=="Jamaica")] <- 43388
conflict$CountryCode[which(conflict$Country=="Korea, South")] <- 42410
conflict$CountryCode[which(conflict$Country=="Laos")] <- 43418
conflict$CountryCode[which(conflict$Country=="Lebanon")] <- 41422
conflict$CountryCode[which(conflict$Country=="Nicaragua")] <- 12558
conflict$CountryCode[which(conflict$Country=="Pakistan")] <- 44586
conflict$CountryCode[which(conflict$Country=="Philippines")] <- 43608
conflict$CountryCode[which(conflict$Country=="Poland")] <- 22616
conflict$CountryCode[which(conflict$Country=="Portugal")] <- 24620
conflict$CountryCode[which(conflict$Country=="Romania")] <- 22642
conflict$CountryCode[which(conflict$Country=="Russian Federation")] <- 22643
conflict$CountryCode[which(conflict$Country=="Saudi Arabia")] <- 41682
conflict$CountryCode[which(conflict$Country=="Somalia")] <- 32706
conflict$CountryCode[which(conflict$Country=="South Africa, Republic of")] <- 35710
conflict$CountryCode[which(conflict$Country=="Sri Lanka")] <- 44144
conflict$CountryCode[which(conflict$Country=="Taiwan")] <- 42158
conflict$CountryCode[which(conflict$Country=="Thailand")] <- 43764
conflict$CountryCode[which(conflict$Country=="Trinidad and Tobago")] <- 13780
conflict$CountryCode[which(conflict$Country=="Ukraine")] <- 22804
conflict$CountryCode[which(conflict$Country=="United Kingdom")] <- 23826
conflict$CountryCode[which(conflict$Country=="United States")] <- 11840
conflict$CountryCode[which(conflict$Country=="Viet Nam")] <- 43704
conflict$CountryCode[which(conflict$Country=="Yugoslavia")] <- 22914

# There are two Congos:
cohort$birthcountry_child[which(cohort$birthcountry_child==34178)] <- 34180

# Merge on "birthcountry_child" and year prior to landing
cohort$LandingYear_lag1 <- cohort$LandingYear_Child-1
cohort <- merge(x=cohort, y=conflict, by.x=c("birthcountry_child", "LandingYear_lag1"), by.y=c("CountryCode", "Year"), all.x=T)

# Convert fatalities to per 100,000:
cohort$ACLED_TotalFatalities <- cohort$ACLED_TotalFatalities*100000/cohort$Population
cohort$UCDP_TotalFatalities <- cohort$UCDP_TotalFatalities*100000/cohort$Population

# Above median fatalities:
cohort$UCDP_TotalFatalities_abovemedian <- (cohort$UCDP_TotalFatalities>quantile(cohort$UCDP_TotalFatalities, 0.5, na.rm=T))

# Regressions
# Large Goldstein scale is good (no conflict)
baseline <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[ImmigrationCategory=="Refugee"])
subsample <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[ImmigrationCategory=="Refugee" & !is.na(GDELT_GoldsteinScale)])
GDELT <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent*GDELT_GoldsteinScale, data=cohort[ImmigrationCategory=="Refugee" & !is.na(GDELT_GoldsteinScale)])
ICEWS <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent*ICEWS_GoldsteinScale, data=cohort[ImmigrationCategory=="Refugee" & !is.na(GDELT_GoldsteinScale)])

stargazer(baseline, subsample, GDELT, ICEWS, type="text",
          add.lines=list(c("Absolute Upward Mobility:", predict(baseline, newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25)), 
                           round(predict(subsample, newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25)),1),
                           round(predict(GDELT, newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25, GDELT_GoldsteinScale=quantile(cohort$GDELT_GoldsteinScale, 0.5, na.rm=T))),1),
                           round(predict(ICEWS, newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25, ICEWS_GoldsteinScale=quantile(cohort$ICEWS_GoldsteinScale, 0.5, na.rm=T))),1)),
                         c("Goldstein: p25", "", "",
                           round(predict(GDELT, newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25, GDELT_GoldsteinScale=mean(cohort$GDELT_GoldsteinScale[which(cohort$GDELT_GoldsteinScale<quantile(cohort$GDELT_GoldsteinScale,0.5,na.rm=T))], na.rm=T))),1),
                           round(predict(ICEWS, newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25, ICEWS_GoldsteinScale=mean(cohort$ICEWS_GoldsteinScale[which(cohort$ICEWS_GoldsteinScale<quantile(cohort$ICEWS_GoldsteinScale,0.5,na.rm=T))], na.rm=T))),1)),
                         c("Goldstein: p75", "", "",
                          round(predict(GDELT, newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25, GDELT_GoldsteinScale=mean(cohort$GDELT_GoldsteinScale[which(cohort$GDELT_GoldsteinScale>quantile(cohort$GDELT_GoldsteinScale,0.5,na.rm=T))], na.rm=T))),1),
                          round(predict(ICEWS, newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25, ICEWS_GoldsteinScale=mean(cohort$ICEWS_GoldsteinScale[which(cohort$ICEWS_GoldsteinScale>quantile(cohort$ICEWS_GoldsteinScale,0.5,na.rm=T))], na.rm=T))),1))
          ))
rm(baseline, subsample, GDELT, ICEWS)

belowmedian <- cohort[ImmigrationCategory=="Refugee" & !is.na(GDELT_GoldsteinScale) & GDELT_GoldsteinScale<quantile(cohort$GDELT_GoldsteinScale,0.5,na.rm=T)]
abovemedian <- cohort[ImmigrationCategory=="Refugee" & !is.na(GDELT_GoldsteinScale) & GDELT_GoldsteinScale>quantile(cohort$GDELT_GoldsteinScale,0.5,na.rm=T)]

par(mar=c(5,5,2,2))

plot(belowmedian[, .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=19, col="black", xlab="Parent Total Household Income", ylab="Child Average Individual Income Percentile", ylim=c(38,80), cex.lab=2, cex.axis=2, cex=1.3)
abline(lm(belowmedian[, .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="black", lwd=3)
points(abovemedian[, .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=18, col="royalblue3", cex=1.5)
abline(lm(abovemedian[, .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="royalblue3", lwd=3)
legend(x=55, y=47, col=c("royalblue3", "black"), pch=c(18,19), title="Goldstein Conflict Scale            ", 
       c(paste("Above Median", " (Intercept=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=abovemedian)$coef[1],1), ", Slope=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=abovemedian)$coef[2],3), ")", sep=""),
         paste("Below Median", " (Intercept=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=belowmedian)$coef[1],1), ", Slope=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=belowmedian)$coef[2],3), ")", sep="")),
       bty="n", cex=1.5, pt.cex=c(1.5,1.3))

predict(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=belowmedian), newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25))
predict(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=abovemedian), newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25))
# Fot vetting bin counts:
below_counts <- belowmedian[, .(N=length(Child_Income_IND_30_34_pct)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"]; below_counts <- below_counts[order(MainParent_Income_HH_MainParentAge45_49_pctparent)]
above_counts <- abovemedian[, .(N=length(Child_Income_IND_30_34_pct)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"]; above_counts <- above_counts[order(MainParent_Income_HH_MainParentAge45_49_pctparent)]
fwrite(below_counts, "H:/Zheng_10223/ToVet/Supporting/Below_Counts.csv")
fwrite(above_counts, "H:/Zheng_10223/ToVet/Supporting/Above_Counts.csv")


###################################################################################

###################################################################################

cohort[, `:=` (MedianFatalities=quantile(.SD$UCDP_TotalFatalities, 0.5, na.rm=T)), by="birthcountry_child"]
cohort$AboveMedianFatalities <- cohort$UCDP_TotalFatalities>=cohort$MedianFatalities
belowmedian <- cohort[ImmigrationCategory=="Refugee" & AboveMedianFatalities==1]
abovemedian <- cohort[ImmigrationCategory=="Refugee" & AboveMedianFatalities==0]

par(mar=c(5,5,2,2))
plot(belowmedian[, .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=19, col="black", xlab="Parent Total Household Income", ylab="Child Average Individual Income Percentile", ylim=c(38,80), cex.lab=2, cex.axis=2, cex=1.3)
abline(lm(belowmedian[, .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="black", lwd=3)
points(abovemedian[!MainParent_Income_HH_MainParentAge45_49_pctparent %in% c(97,99), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=18, col="royalblue3", cex=1.5)
abline(lm(abovemedian[!MainParent_Income_HH_MainParentAge45_49_pctparent %in% c(97,99), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="royalblue3", lwd=3)
legend(x=55, y=47, col=c("royalblue3", "black"), pch=c(18,19), title="Total Fatalities (Within Country)            ", 
       c(paste("Above Median", " (Intercept=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=abovemedian)$coef[1],1), ", Slope=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=abovemedian)$coef[2],3), ")", sep=""),
         paste("Below Median", " (Intercept=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=belowmedian)$coef[1],1), ", Slope=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=belowmedian)$coef[2],3), ")", sep="")),
       bty="n", cex=1.5, pt.cex=c(1.5,1.3))

predict(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=belowmedian), newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25))
predict(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=abovemedian), newdata=data.frame(MainParent_Income_HH_MainParentAge45_49_pctparent=25))

# Fot vetting bin counts:
below_counts <- belowmedian[, .(N=length(Child_Income_IND_30_34_pct)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"]; below_counts <- below_counts[order(MainParent_Income_HH_MainParentAge45_49_pctparent)]
above_counts <- abovemedian[, .(N=length(Child_Income_IND_30_34_pct)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"]; above_counts <- above_counts[order(MainParent_Income_HH_MainParentAge45_49_pctparent)]
fwrite(below_counts, "H:/Zheng_10223/ToVet/Supporting/Below_Counts.csv")
fwrite(above_counts, "H:/Zheng_10223/ToVet/Supporting/Above_Counts.csv")
